library(foreign)
library(haven)
library(tidyverse)
library(dplyr)
library(plyr)
library(plm)
library(multiwayvcov)
library(lmtest)
library(car)
library(PanelMatch)
library(estimatr)

#############
##FUNCTIONS##
#############

rmse_calculator <- function(i, type) {
  file <- paste0(type,"_model_paper", i, ".rds")
  m <- readRDS(file)[[1]]
  rmse <- sqrt(sum(m$residuals^2)/m$df.residual)
  n <- pdim(m)$nT$N
  return(data.frame(model=i, rmse=rmse, n=n))
}

lags <- function(data, var) {
  n_lags <- length(data[,var])-1
  if(n_lags>0) {
    for(i in 1:n_lags) {
      data[,paste0(var,"_l",i)] <- c(rep(NA,i), data[1:(length(data[,var])-i),var])
    }
  }
  return(data)
}

############
##ANALYSIS##
############

#Set working directory to local data folder#

workingDirectory <- "/Users/ericmcghee/Dropbox/PPIC/UCF/Post-election Covid/academic paper/data"

setwd(workingDirectory)

d <- read_csv("pandemic_election_formatted_data_turnout.csv")

#Set working directory to local output folder#

workingDirectory <- "/Users/ericmcghee/Dropbox/PPIC/UCF/Post-election Covid/academic paper/analysis"

setwd(workingDirectory)

##DIFFERENCE-DIFFERENCES (Table A9)##
#Table A9, Model 2: diff-in-diff w/ 2020 interaction, county trends, & other reforms:  ELIGIBLE TURNOUT#
m <- plm(pcto.elig ~ yr2020*(vbm_all + vbm_noex + vbm_app + avr + cases_mn_centered + pv_marg) + vbm_perm + edr + early + active + 
           yrcount*as.factor(geoid), 
         filter(d, reg_all>0), index=c("geoid","year"), model="within", effect="twoways")
keep_vars <- which(!str_detect(rownames(summary(m)$coefficients), "as.factor"))
out2 <- as.data.frame(round(coeftest(m, vcovHC, type="HC1", cluster="group")[keep_vars,], 3)) %>% rownames_to_column(var="Variables") %>%
  mutate(index=1:nrow(.)) %>% 
  gather("Type","Value", -c(index, Variables, `t value`, `Pr(>|t|)`)) %>%
  arrange(index)
saveRDS(list(m, out2), "turnout_model_paper2_elig.rds")

#Table A9, Model 1: diff-in-diff w/ 2020 interaction, county trends, & percent vbm interaction: ELIGIBLE TURNOUT#
m <- plm(pcto.elig ~ yr2020*(vbm_all + pcvbm + vbm_noex + vbm_app + avr + cases_mn_centered + pv_marg) + vbm_perm + edr + early + active + 
           yrcount*as.factor(geoid), 
         filter(d, reg_all>0), index=c("geoid","year"), model="within", effect="twoways")
keep_vars <- which(!str_detect(rownames(summary(m)$coefficients), "as.factor"))
out1 <- as.data.frame(round(coeftest(m, vcovHC, type="HC1", cluster="group")[keep_vars,], 3)) %>% rownames_to_column(var="Variables") %>%
  mutate(index=1:nrow(.)) %>% 
  gather("Type","Value", -c(index, Variables, `t value`, `Pr(>|t|)`)) %>%
  arrange(index)
saveRDS(list(m, out1), "turnout_model_paper1_elig.rds")

out.all <- full_join(out1, out2, by=c("Variables","Type"), suffix=c(".1",".2"))

out.all <- out.all[,!str_detect(names(out.all), "t value|Pr|index|Type")]

write.csv(out.all, "turnout_results_table_elig.csv")

#RMSEs for tables#
model_rmse_elig <- ldply(lapply(c("1_elig","2_elig"), function(i,t) rmse_calculator(i,t), "turnout"))
saveRDS(model_rmse_elig, "turnout_model_fits_elig.rds")

##LAGGED DV MODEL (Table A7)##
#Share of eligible#
d2 <- arrange(d, stpost, geoid, year) %>%
  filter(!is.na(geoid)) %>%
  ddply(.(geoid), function(x,y) lags(x,y), "pcto.elig")

m <- lm_robust(pcto.elig ~ vbm_all + vbm_noex + vbm_app + avr + cases_mn_centered + pv_marg + active +
                 pcto.elig_l5 + pcto.elig_l4 + pcto.elig_l3 + pcto.elig_l2 + pcto.elig_l1,
               data=filter(d2, year==2020), clusters=stpost)
summary(m)

out.ldv <- as.data.frame(round(coeftest(m)[1:nrow(coeftest(m)),], 3)) %>% rownames_to_column(var="Variables") %>%
  mutate(index=1:nrow(.)) %>% 
  gather("Type","Value", -c(index, Variables, `t value`, `Pr(>|t|)`)) %>%
  arrange(index)
saveRDS(list(m, out.ldv), "turnout_ldv_paper_elig.rds")

saveRDS(data.frame(model="ldv", rmse=sqrt(m$res_var), n=m$nobs), "turnout_ldv_fit_elig.rds")

##MATCHING MODELS (Tables A4 & A6)##
#Universal VBM: Eligible Turnout#
d <- mutate(d, geoid2=as.integer(geoid), yrcount2=as.integer(yrcount+1)) %>%
  ddply(.(geoid), transform, pre_2020=any(vbm_all[year<2020]))

pm.out <- PanelMatch(lag = 5, time.id = "yrcount2", unit.id = "geoid2", 
                     treatment = "vbm_all", refinement.method = "none", # no refinement 
                     data = filter(d, !(pre_2020)), 
                     match.missing = TRUE, 
                     covs.formula = ~ I(lag(pcto.elig, 1:5)) +  I(lag(pv_marg, 1:5)) +
                       vbm_noex + vbm_app + avr + cases_mn_centered + pv_marg, 
                     size.match = 5, qoi = "att" , outcome.var = "pcto.elig",
                     lead = 0, forbid.treatment.reversal = FALSE, 
                     use.diagonal.variance.matrix = TRUE)

pm.balance.vbm_all.null <- get_covariate_balance(pm.out$att,
                                                 data = filter(d, !(pre_2020)),
                                                 covariates = c("cases_mn_centered","pv_marg","pcto.elig"),
                                                 plot = FALSE)
pm.balance.vbm_all.null
pm.balance.vbm_all.null["t_0","pcto.elig"] <- NA
pm.balance <- cbind(data.frame(dv="vbm_all", model="null"), 
                    value=apply(pm.balance.vbm_all.null, 2, function(x) mean(x, na.rm=T)))

pm.out <- PanelMatch(lag = 5, time.id = "yrcount2", unit.id = "geoid2", 
                     treatment = "vbm_all", refinement.method = "mahalanobis", # use Mahalanobis distance 
                     data = filter(d, !(pre_2020)), 
                     match.missing = TRUE, 
                     covs.formula = ~ I(lag(pcto.elig, 1:5)) +  I(lag(pv_marg, 1:5)) +
                       vbm_noex + vbm_app + avr + cases_mn_centered + pv_marg, 
                     size.match = 5, qoi = "att" , outcome.var = "pcto.elig",
                     lead = 0, forbid.treatment.reversal = FALSE, 
                     use.diagonal.variance.matrix = TRUE)

pm.balance.vbm_all.match <- get_covariate_balance(pm.out$att,
                                                  data = filter(d, !(pre_2020)),
                                                  covariates = c("cases_mn_centered","pv_marg","pcto.elig"),
                                                  plot = FALSE)
pm.balance.vbm_all.match
pm.balance.vbm_all.match["t_0","pcto.elig"] <- NA
pm.balance <- rbind.fill(pm.balance, cbind(data.frame(dv="vbm_all", model="matched"), 
                                           value=apply(pm.balance.vbm_all.match, 2, function(x) mean(x, na.rm=T))))

pe.out.vbm_all.match <- PanelEstimate(sets=pm.out, data=filter(d, !(pre_2020)))
summary(pe.out.vbm_all.match)

#No-excuse VBM: Eligible Turnout#
d <- mutate(d, geoid2=as.integer(geoid), yrcount2=as.integer(yrcount+1)) %>%
  ddply(.(geoid), transform, pre_2020=any(vbm_noex[year<2020]))

pm.out <- PanelMatch(lag = 5, time.id = "yrcount2", unit.id = "geoid2", 
                     treatment = "vbm_noex", refinement.method = "none", # no refinement 
                     data = filter(d, !(pre_2020)), 
                     match.missing = TRUE, 
                     covs.formula = ~ I(lag(pcto.elig, 1:5)) +  I(lag(pv_marg, 1:5)) +
                       vbm_all + vbm_app + avr + cases_mn_centered, 
                     size.match = 5, qoi = "att" , outcome.var = "pcto.elig",
                     lead = 0, forbid.treatment.reversal = FALSE, 
                     use.diagonal.variance.matrix = TRUE)

pm.balance.vbm_noex.null <- get_covariate_balance(pm.out$att,
                                                  data = filter(d, !(pre_2020)),
                                                  covariates = c("cases_mn_centered","pv_marg","pcto.elig"),
                                                  plot = FALSE)
pm.balance.vbm_noex.null
pm.balance.vbm_noex.null["t_0","pcto.elig"] <- NA
pm.balance <- rbind.fill(pm.balance, cbind(data.frame(dv="vbm_noex", model="null"), 
                                           value=apply(pm.balance.vbm_noex.null, 2, function(x) mean(x, na.rm=T))))

pm.out <- PanelMatch(lag = 5, time.id = "yrcount2", unit.id = "geoid2", 
                     treatment = "vbm_noex", refinement.method = "mahalanobis", # use Mahalanobis distance 
                     data = filter(d, !(pre_2020)), 
                     match.missing = TRUE, 
                     covs.formula = ~ I(lag(pcto.elig, 1:5)) +  I(lag(pv_marg, 1:5)) +
                       vbm_all + vbm_app + avr + cases_mn_centered, 
                     size.match = 5, qoi = "att" , outcome.var = "pcto.elig",
                     lead = 0, forbid.treatment.reversal = FALSE, 
                     use.diagonal.variance.matrix = TRUE)

pm.balance.vbm_noex.match <- get_covariate_balance(pm.out$att,
                                                   data = filter(d, !(pre_2020)),
                                                   covariates = c("cases_mn_centered","pv_marg","pcto.elig"),
                                                   plot = FALSE)
pm.balance.vbm_noex.match
pm.balance.vbm_noex.match["t_0","pcto.elig"] <- NA
pm.balance <- rbind.fill(pm.balance, cbind(data.frame(dv="vbm_noex", model="matched"), 
                                           value=apply(pm.balance.vbm_noex.match, 2, function(x) mean(x, na.rm=T))))

pe.out.vbm_noex.match <- PanelEstimate(sets=pm.out, data=filter(d, !(pre_2020)))
summary(pe.out.vbm_noex.match)

#VBM application: Eligible Turnout#
d <- mutate(d, geoid2=as.integer(geoid), yrcount2=as.integer(yrcount+1)) %>%
  ddply(.(geoid), transform, pre_2020=any(vbm_app[year<2020]))

pm.out <- PanelMatch(lag = 5, time.id = "yrcount2", unit.id = "geoid2", 
                     treatment = "vbm_app", refinement.method = "none", # no refinement 
                     data = filter(d, !(pre_2020)), 
                     match.missing = TRUE, 
                     covs.formula = ~ I(lag(pcto.elig, 1:5)) +  I(lag(pv_marg, 1:5)) +
                       vbm_all + vbm_noex + avr + cases_mn_centered, 
                     size.match = 5, qoi = "att" , outcome.var = "pcto.elig",
                     lead = 0, forbid.treatment.reversal = FALSE, 
                     use.diagonal.variance.matrix = TRUE)

pm.balance.vbm_app.null <- get_covariate_balance(pm.out$att,
                                                 data = filter(d, !(pre_2020)),
                                                 covariates = c("cases_mn_centered","pv_marg","pcto.elig"),
                                                 plot = FALSE)
pm.balance.vbm_app.null
pm.balance.vbm_app.null["t_0","pcto.elig"] <- NA
pm.balance <- rbind.fill(pm.balance, cbind(data.frame(dv="vbm_app", model="null"), 
                                           value=apply(pm.balance.vbm_app.null, 2, function(x) mean(x, na.rm=T))))

pm.out <- PanelMatch(lag = 5, time.id = "yrcount2", unit.id = "geoid2", 
                     treatment = "vbm_app", refinement.method = "mahalanobis", # use Mahalanobis distance 
                     data = filter(d, !(pre_2020)), 
                     match.missing = TRUE, 
                     covs.formula = ~ I(lag(pcto.elig, 1:5)) +  I(lag(pv_marg, 1:5)) +
                       vbm_all + vbm_noex + avr + cases_mn_centered, 
                     size.match = 5, qoi = "att" , outcome.var = "pcto.elig",
                     lead = 0, forbid.treatment.reversal = FALSE, 
                     use.diagonal.variance.matrix = TRUE)

pm.balance.vbm_app.match <- get_covariate_balance(pm.out$att,
                                                  data = filter(d, !(pre_2020)),
                                                  covariates = c("cases_mn_centered","pv_marg","pcto.elig"),
                                                  plot = FALSE)
pm.balance.vbm_app.match
pm.balance.vbm_app.match["t_0","pcto.elig"] <- NA
pm.balance <- rbind.fill(pm.balance, cbind(data.frame(dv="vbm_app", model="match"), 
                                           value=apply(pm.balance.vbm_app.match, 2, function(x) mean(x, na.rm=T))))

pe.out.vbm_app.match <- PanelEstimate(sets=pm.out, data=filter(d, !(pre_2020)))
summary(pe.out.vbm_app.match)

pm.balance <- rbind.fill(cbind(data.frame(var=colnames(pm.balance.vbm_all.null), dv="vbm_all", model="null"), 
                               value=apply(pm.balance.vbm_all.null, 2, function(x) mean(x, na.rm=T))),
                         cbind(data.frame(var=colnames(pm.balance.vbm_all.match), dv="vbm_all", model="matched"), 
                               value=apply(pm.balance.vbm_all.match, 2, function(x) mean(x, na.rm=T))),
                         cbind(data.frame(var=colnames(pm.balance.vbm_noex.null), dv="vbm_noex", model="null"), 
                               value=apply(pm.balance.vbm_noex.null, 2, function(x) mean(x, na.rm=T))),
                         cbind(data.frame(var=colnames(pm.balance.vbm_noex.match), dv="vbm_noex", model="matched"), 
                               value=apply(pm.balance.vbm_noex.match, 2, function(x) mean(x, na.rm=T))),
                         cbind(data.frame(var=colnames(pm.balance.vbm_app.null), dv="vbm_app", model="null"), 
                               value=apply(pm.balance.vbm_app.null, 2, function(x) mean(x, na.rm=T))),
                         cbind(data.frame(var=colnames(pm.balance.vbm_app.match), dv="vbm_app", model="matched"), 
                               value=apply(pm.balance.vbm_app.match, 2, function(x) mean(x, na.rm=T))))

write.csv(spread(pm.balance, model, value) %>% arrange(dv, var), "turnout_matching_balance_elig.csv")

pe.out <- rbind.fill(data.frame(summary(pe.out.vbm_all.match)$summary),
                     data.frame(summary(pe.out.vbm_noex.match)$summary),
                     data.frame(summary(pe.out.vbm_app.match)$summary))

write.csv(pe.out, "turnout_matching_estimates_elig.csv")


